Prepare pbulk gene expression data for DE analysis

library(knitr)
library(SummarizedExperiment)
library(limma)
library(edgeR)
library(dplyr)
library(ggplot2)
library(BiocParallel)
library(doParallel)
param = SnowParam(32,"SOCK",progressbar = T)
register(param)
registerDoParallel()
options(future.globals.maxSize = 120 * 1024 ^3)

knitr::opts_chunk$set(echo = TRUE)
options(stringsAsFactors = FALSE)
opts_knit$set(echo = TRUE, message = FALSE, warning = FALSE, cache.lazy = FALSE, out.width=900)
opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, cache.lazy = FALSE, out.width=900)

output.folder <- file.path("output","CITE-seq")
dir.create(output.folder)
## Warning in dir.create(output.folder): 'output/CITE-seq' already exists
pseudobulk_list <- readRDS(file.path("data","covid_flu.CITEseq.pseudobulk.gene.expr.RDS"))

Filtering and normalization of count data, and create a batch-corrected gene expression dataset for visualization purpose

var.threshold <- 1
pseudobulk_list_normalized <- list()
pseudobulk_list_filtered <- list()
for (i in names(pseudobulk_list)) {
  cat(i,"-\n")
  eset <- pseudobulk_list[[i]]
  geneExpr <- eset$counts
  sample.info <- eset$samples
  sample.info$sample.id <- paste0(sample.info$alt.subject.id,".",sample.info$visit)
  
  # filter out lowly expressed genes
  cat("Number of genes with no counts across samples:\n")
  print(table(rowSums(eset$counts==0)==nrow(sample.info)))
  keep.exprs <- filterByExpr(eset,group = sample.info$visit.overall.group.sex, min.count = 2,min.prop=0.5)
  cat("Number of genes to keep by expr:",sum(keep.exprs),"\n")
  eset <- eset[keep.exprs,,keep.lib.sizes = F]
 
  # stage 1: normalize and correct
  eset <- calcNormFactors(eset)
  sample.info$visit.overall.group.sex <- ifelse(is.na(sample.info$visit.overall.group.sex),"Control",as.character(sample.info$visit.overall.group.sex))
  # a model that accommodate the control samples
  tmp.model <- model.matrix(~ 0 + visit.overall.group.sex,sample.info)
  vobj <- voom(eset ,design = tmp.model,plot = F)
  #title("",i)
  geneExpr <- vobj$E # normalized
  
  # PCA using baseline samples before correction
  baseline.samples <- subset(sample.info,!(visit %in% c("Day 1","Day 28")))
  baseline.geneExpr <- geneExpr[,rownames(baseline.samples)]
  baseline.geneExpr.pca <- prcomp(t(baseline.geneExpr),center = T,scale. = T)
  baseline.samples <- cbind(baseline.samples,baseline.geneExpr.pca$x[rownames(baseline.samples),1:2])
  print(ggplot(baseline.samples,aes(PC1,PC2)) + geom_point(aes(color=paste0(Batch),shape=alt.subject.id == "Control"),size=2,alpha=0.8) + 
          ggtitle(paste0("Before correction: ",i)))
  cat("Before correction:\n")
  print(car::Anova(lm(PC1 ~ n_barcodes + Batch,baseline.samples)))
  print(car::Anova(lm(PC2 ~ n_barcodes + Batch,baseline.samples)))
  
  # remove batch effect
  normalized.expr.batch.effect <- removeBatchEffect(geneExpr, batch = eset$samples$Batch, 
                                                    covariates = eset$samples$n_barcodes,
                                                    design = tmp.model)
  geneExpr <- normalized.expr.batch.effect
  eset$normalizedExpr <- geneExpr
  pseudobulk_list_normalized[[i]] <- eset
  
  # PCA using baseline samples after correction
  baseline.samples <- subset(sample.info,!(visit %in% c("Day 1","Day 28")))
  baseline.geneExpr <- geneExpr[,rownames(baseline.samples)]
  baseline.geneExpr.pca <- prcomp(t(baseline.geneExpr),center = T,scale. = T)
  baseline.samples <- cbind(baseline.samples,baseline.geneExpr.pca$x[rownames(baseline.samples),1:2])
  print(ggplot(baseline.samples,aes(PC1,PC2)) + geom_point(aes(color=paste0(Batch),shape=alt.subject.id == "Control"),size=2,alpha=0.8) + 
          ggtitle(paste0("After correction: ",i)))
  cat("After correction:\n")
  print(car::Anova(lm(PC1 ~ n_barcodes + Batch,baseline.samples)))
  print(car::Anova(lm(PC2 ~ n_barcodes + Batch,baseline.samples)))
  
  # stage 2: filter out genes by technical noise by leveraging repeats
  if (sum(table(sample.info$sample.id) > 1) > 0) {
    geneExpr.var <- apply(geneExpr,1,var)
    repeat.geneExpr.var <- data.frame(id=sample.info$sample.id,t(geneExpr)) %>% group_by(id) %>% summarise(across(where(is.numeric),var))
    repeat.geneExpr.var <- apply(as.data.frame(repeat.geneExpr.var[,-1]),2,mean,na.rm=T)
    plot(log2(repeat.geneExpr.var+1),log2(geneExpr.var+1),main=i)
    abline(coef=c(0,1),col="red")
    keep.var <- geneExpr.var > repeat.geneExpr.var*var.threshold
    cat("Number of genes to keep by var:",sum(keep.var),"\n")
    eset <- eset[keep.var,,keep.lib.sizes = F]
    geneExpr <- geneExpr[keep.var,]
    eset$normalizedExpr <- geneExpr
  }
    
  pseudobulk_list_filtered[[i]] <- eset
}
## B -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 22081  6321 
## Number of genes to keep by expr: 13286

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df  F value Pr(>F)    
## n_barcodes     71  1   0.7015 0.4071    
## Batch       32259  2 159.6085 <2e-16 ***
## Residuals    4143 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value  Pr(>F)  
## n_barcodes  2507.8  1  3.8744 0.05581 .
## Batch       1219.3  2  0.9418 0.39818  
## Residuals  26538.1 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value Pr(>F)
## n_barcodes    200  1  0.2503 0.6195
## Batch         233  2  0.1459 0.8647
## Residuals   32742 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes     0.2  1  0.0003 0.9854
## Batch          3.6  2  0.0029 0.9971
## Residuals  25504.4 41

## Number of genes to keep by var: 9344 
## B_Mem -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 20155  8247 
## Number of genes to keep by expr: 11707

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes 2635.5  1  14.635  0.000437 ***
## Batch      9141.5  2  25.381 6.722e-08 ***
## Residuals  7383.5 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   340.1  1  0.7094 0.4045
## Batch       1206.5  2  1.2583 0.2949
## Residuals  19657.2 41

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes   196.3  1  0.3565 0.5537
## Batch        426.1  2  0.3870 0.6815
## Residuals  22570.7 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes    65.5  1  0.1353 0.7149
## Batch        156.5  2  0.1615 0.8514
## Residuals  19860.3 41

## Number of genes to keep by var: 6045 
## B_Naive -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 20734  7668 
## Number of genes to keep by expr: 12097

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df  F value Pr(>F)    
## n_barcodes    31.2  1   0.4065 0.5273    
## Batch      28130.6  2 183.2146 <2e-16 ***
## Residuals   3147.6 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  9377.9  1 23.1484 2.054e-05 ***
## Batch        790.0  2  0.9751    0.3857    
## Residuals  16610.0 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes  1071.2  1  1.7628 0.1916
## Batch        577.7  2  0.4754 0.6250
## Residuals  24914.8 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   177.0  1  0.3139 0.5784
## Batch         38.3  2  0.0340 0.9666
## Residuals  23114.5 41

## Number of genes to keep by var: 9436 
## B_Naive_Intermediate -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 18457  9945 
## Number of genes to keep by expr: 10295

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  5613.9  1 22.0304 2.987e-05 ***
## Batch       3349.6  2  6.5724  0.003343 ** 
## Residuals  10447.9 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value   Pr(>F)   
## n_barcodes  2988.2  1  9.1398 0.004300 **
## Batch       3837.3  2  5.8684 0.005738 **
## Residuals  13404.7 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes  1147.9  1  2.0510 0.1597
## Batch        934.3  2  0.8346 0.4413
## Residuals  22947.0 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value  Pr(>F)  
## n_barcodes  1174.8  1  2.9229 0.09489 .
## Batch        895.2  2  1.1136 0.33812  
## Residuals  16479.6 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Number of genes to keep by var: 6274 
## CD4 -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 24592  3810 
## Number of genes to keep by expr: 16414

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value  Pr(>F)    
## n_barcodes    328  1   5.931 0.01931 *  
## Batch       79225  2 716.196 < 2e-16 ***
## Residuals    2268 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes 20211.1  1 33.0264 9.909e-07 ***
## Batch       3144.4  2  2.5691   0.08888 .  
## Residuals  25090.6 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes  19299  1 19.2062 7.961e-05 ***
## Batch         833  2  0.4147    0.6632    
## Residuals   41198 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value Pr(>F)
## n_barcodes   2502  1  2.6768 0.1095
## Batch         484  2  0.2592 0.7730
## Residuals   38316 41

## Number of genes to keep by var: 12577 
## CD4_CM -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 22678  5724 
## Number of genes to keep by expr: 13924

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df  F value Pr(>F)    
## n_barcodes     41  1   0.9274 0.3412    
## Batch       46150  2 517.4058 <2e-16 ***
## Residuals    1828 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes 19167.4  1 68.3917 2.831e-10 ***
## Batch       4094.5  2  7.3049  0.001934 ** 
## Residuals  11490.6 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value   Pr(>F)    
## n_barcodes 17553.6  1 39.3929 1.75e-07 ***
## Batch       1544.3  2  1.7328   0.1895    
## Residuals  18269.8 41                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   709.9  1  1.2340 0.2731
## Batch        468.4  2  0.4071 0.6682
## Residuals  23585.5 41

## Number of genes to keep by var: 10495 
## CD4_EM -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 21037  7365 
## Number of genes to keep by expr: 12514

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value  Pr(>F)    
## n_barcodes   324.4  1    4.27 0.04515 *  
## Batch      19929.8  2  131.16 < 2e-16 ***
## Residuals   3115.1 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value   Pr(>F)   
## n_barcodes  5819.3  1 11.4757 0.001567 **
## Batch       5150.5  2  5.0784 0.010704 * 
## Residuals  20790.9 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes  1170.7  1  1.5841 0.2153
## Batch        303.0  2  0.2050 0.8155
## Residuals  30299.5 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   568.5  1  0.9127 0.3450
## Batch        339.3  2  0.2724 0.7629
## Residuals  25539.9 41

## Number of genes to keep by var: 8040 
## CD4_Naive -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 23214  5188 
## Number of genes to keep by expr: 14862

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes   2362  1  41.939 9.101e-08 ***
## Batch       58024  2 515.197 < 2.2e-16 ***
## Residuals    2309 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes  20886  1 53.9884 5.346e-09 ***
## Batch        4451  2  5.7526  0.006279 ** 
## Residuals   15862 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value   Pr(>F)   
## n_barcodes  7155.1  1 10.1625 0.002743 **
## Batch         91.4  2  0.0649 0.937252   
## Residuals  28866.7 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes    32.4  1  0.0445 0.8341
## Batch       1234.4  2  0.8478 0.4357
## Residuals  29849.4 41

## Number of genes to keep by var: 10442 
## CD4_platelet_bind -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 18819  9583 
## Number of genes to keep by expr: 10420

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  9597.6  1 31.8751 1.379e-06 ***
## Batch       3491.2  2  5.7974  0.006064 ** 
## Residuals  12345.1 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df  F value  Pr(>F)    
## n_barcodes   210.3  1   3.6266 0.06389 .  
## Batch      13348.3  2 115.0904 < 2e-16 ***
## Residuals   2377.6 41                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes    28.0  1  0.0613 0.8056
## Batch        509.9  2  0.5575 0.5769
## Residuals  18750.4 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)  
## n_barcodes  1553.1  1  4.5524 0.0389 *
## Batch        162.1  2  0.2376 0.7896  
## Residuals  13987.1 41                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Number of genes to keep by var: 6889 
## CD4_Tfh -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 16099 12303 
## Number of genes to keep by expr: 7391

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes 7328.5  1 41.6731 1.719e-07 ***
## Batch         8.0  2  0.0228    0.9775    
## Residuals  6330.8 36                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes 1567.2  1  9.9111  0.003295 ** 
## Batch      4402.5  2 13.9213 3.322e-05 ***
## Residuals  5692.4 36                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value Pr(>F)
## n_barcodes      7  1  0.0207 0.8865
## Batch         104  2  0.1527 0.8589
## Residuals   12260 36               
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value Pr(>F)
## n_barcodes    0.1  1  0.0003 0.9861
## Batch       120.2  2  0.2291 0.7964
## Residuals  9441.3 36

## Number of genes to keep by var: 4456 
## CD4_Treg -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 19005  9397 
## Number of genes to keep by expr: 10798

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes 11079.9  1  37.126 3.189e-07 ***
## Batch        365.3  2   0.612    0.5471    
## Residuals  12236.0 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  1635.6  1   8.778  0.005056 ** 
## Batch      11819.0  2  31.715 4.744e-09 ***
## Residuals   7639.6 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value  Pr(>F)  
## n_barcodes  1687.6  1  3.5470 0.06676 .
## Batch        326.0  2  0.3425 0.71198  
## Residuals  19507.2 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   215.8  1  0.4588  0.502
## Batch        195.8  2  0.2081  0.813
## Residuals  19288.0 41

## Number of genes to keep by var: 6613 
## CD8 -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 23595  4807 
## Number of genes to keep by expr: 14934

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value   Pr(>F)   
## n_barcodes   7290  1  4.8730 0.032934 * 
## Batch       21213  2  7.0904 0.002267 **
## Residuals   61333 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes   3779  1  13.608 0.0006553 ***
## Batch       41476  2  74.677 2.143e-14 ***
## Residuals   11386 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value Pr(>F)
## n_barcodes    873  1  0.4143 0.5234
## Batch         988  2  0.2343 0.7922
## Residuals   86404 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value Pr(>F)
## n_barcodes      6  1  0.0060 0.9384
## Batch          71  2  0.0389 0.9619
## Residuals   37548 41

## Number of genes to keep by var: 10944 
## CD8_CM -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 20014  8388 
## Number of genes to keep by expr: 11511

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  9707.2  1 32.0403 1.314e-06 ***
## Batch       4423.9  2  7.3009   0.00194 ** 
## Residuals  12421.8 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  1060.3  1  3.1909   0.08145 .  
## Batch      14440.4  2 21.7286 3.681e-07 ***
## Residuals  13623.9 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes    29.7  1  0.0439 0.8351
## Batch        359.0  2  0.2649 0.7686
## Residuals  27775.4 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes    70.8  1  0.1296 0.7207
## Batch         38.8  2  0.0355 0.9651
## Residuals  22412.0 41

## Number of genes to keep by var: 7996 
## CD8_EM -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 21356  7046 
## Number of genes to keep by expr: 12441

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes 16138.5  1 28.4207 3.844e-06 ***
## Batch       4372.7  2  3.8503   0.02935 *  
## Residuals  23281.6 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  3377.6  1  11.258  0.001717 ** 
## Batch      29767.9  2  49.613 1.127e-11 ***
## Residuals  12300.1 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value  Pr(>F)  
## n_barcodes   3350  1  3.7591 0.05943 .
## Batch        1226  2  0.6878 0.50837  
## Residuals   36534 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   215.8  1  0.2937 0.5908
## Batch        239.6  2  0.1630 0.8501
## Residuals  30123.0 41

## Number of genes to keep by var: 7351 
## CD8_EM:GPR56hi -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 19583  8819 
## Number of genes to keep by expr: 10402

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value   Pr(>F)   
## n_barcodes   7244  1  9.1612 0.004259 **
## Batch         407  2  0.2577 0.774095   
## Residuals   32417 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes    16.8  1  0.1015    0.7517    
## Batch      15694.0  2 47.3692 2.196e-11 ***
## Residuals   6791.9 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value Pr(>F)
## n_barcodes   1162  1  1.2699 0.2663
## Batch        1071  2  0.5852 0.5616
## Residuals   37502 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes    19.6  1  0.0398 0.8428
## Batch        472.0  2  0.4807 0.6218
## Residuals  20127.9 41

## Number of genes to keep by var: 5997 
## CD8_EM:GPR56lo -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 20473  7929 
## Number of genes to keep by expr: 11660

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes 16001.0  1 37.2709 3.067e-07 ***
## Batch       6556.6  2  7.6361  0.001517 ** 
## Residuals  17602.0 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes   523.5  1  3.8171   0.05758 .  
## Batch      25826.4  2 94.1636 4.707e-16 ***
## Residuals   5622.6 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value  Pr(>F)  
## n_barcodes  2976.4  1  4.3688 0.04285 *
## Batch        508.0  2  0.3728 0.69110  
## Residuals  27932.1 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   403.3  1  0.6833 0.4132
## Batch        442.6  2  0.3749 0.6897
## Residuals  24200.5 41

## Number of genes to keep by var: 6902 
## CD8_Naive -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 21424  6978 
## Number of genes to keep by expr: 12992

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value   Pr(>F)    
## n_barcodes 10547.8  1 16.6157 0.000205 ***
## Batch       4994.2  2  3.9336 0.027366 *  
## Residuals  26027.2 41                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df  F value Pr(>F)    
## n_barcodes    29.0  1   0.1931 0.6627    
## Batch      30686.0  2 102.1766 <2e-16 ***
## Residuals   6156.6 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value Pr(>F)
## n_barcodes    206  1  0.2185 0.6427
## Batch        1276  2  0.6772 0.5136
## Residuals   38635 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   585.4  1  0.9417 0.3375
## Batch        494.1  2  0.3974 0.6746
## Residuals  25486.2 41

## Number of genes to keep by var: 9229 
## CD8_proliferating -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 15952 12450 
## Number of genes to keep by expr: 9499

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value  Pr(>F)  
## n_barcodes  4174.3  1  5.4806 0.03094 *
## Batch        350.7  2  0.2302 0.79668  
## Residuals  13709.9 18                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   434.7  1  0.7710 0.3915
## Batch       1858.4  2  1.6481 0.2202
## Residuals  10148.7 18

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes   666.0  1  0.7121 0.4098
## Batch        802.6  2  0.4290 0.6576
## Residuals  16835.7 18               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   549.3  1  0.8367 0.3724
## Batch        371.6  2  0.2830 0.7568
## Residuals  11817.3 18

## Number of genes to keep by var: 6636 
## CD8_TEMRA -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 20200  8202 
## Number of genes to keep by expr: 11303

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes 11052.3  1 23.1575 2.048e-05 ***
## Batch       2593.4  2  2.7169   0.07798 .  
## Residuals  19567.9 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes   111.8  1   0.590    0.4468    
## Batch      18545.1  2  48.928 1.379e-11 ***
## Residuals   7770.1 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes   241.7  1  0.3674 0.5477
## Batch        308.2  2  0.2342 0.7922
## Residuals  26974.5 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes     3.3  1  0.0057 0.9404
## Batch        388.7  2  0.3349 0.7173
## Residuals  23788.4 41

## Number of genes to keep by var: 6790 
## CD8_TRM -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 17231 11171 
## Number of genes to keep by expr: 9112

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes 12005.0  1 39.8568 1.715e-07 ***
## Batch       1143.8  2  1.8988     0.163    
## Residuals  12048.1 40                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value  Pr(>F)    
## n_barcodes    34.4  1  0.4341  0.5137    
## Batch      10628.0  2 66.9987 1.7e-13 ***
## Residuals   3172.6 40                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes   512.2  1  1.1736 0.2851
## Batch        164.9  2  0.1889 0.8286
## Residuals  17457.5 40               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes     1.8  1  0.0058 0.9394
## Batch         13.3  2  0.0214 0.9789
## Residuals  12481.3 40

## Number of genes to keep by var: 5926 
## cDC -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 20665  7737 
## Number of genes to keep by expr: 12241

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df  F value Pr(>F)    
## n_barcodes     0.3  1   0.0101 0.9205    
## Batch      26364.6  2 444.1075 <2e-16 ***
## Residuals   1187.3 40                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  7158.3  1 16.1734 0.0002491 ***
## Batch        510.3  2  0.5764 0.5664986    
## Residuals  17703.8 40                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value  Pr(>F)  
## n_barcodes  2913.9  1  5.0692 0.02992 *
## Batch        781.6  2  0.6799 0.51244  
## Residuals  22993.3 40                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes    48.5  1  0.0964 0.7578
## Batch        144.8  2  0.1439 0.8664
## Residuals  20120.5 40

## Number of genes to keep by var: 7013 
## gdT-Vd2 -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 19815  8587 
## Number of genes to keep by expr: 10989

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes 17330.9  1 28.7944 3.665e-06 ***
## Batch       2678.8  2  2.2254    0.1212    
## Residuals  24075.3 40                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes     5.5  1  0.0099 0.9214
## Batch        237.7  2  0.2129 0.8092
## Residuals  22328.1 40

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value Pr(>F)
## n_barcodes    255  1  0.3092 0.5813
## Batch         669  2  0.4053 0.6695
## Residuals   33023 40               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   103.6  1  0.1687 0.6835
## Batch        167.1  2  0.1360 0.8732
## Residuals  24569.2 40

## Number of genes to keep by var: 5455 
## HSPC -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 17215 11187 
## Number of genes to keep by expr: 10598

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value   Pr(>F)   
## n_barcodes 4159.0  1 12.7047 0.001943 **
## Batch      6074.5  2  9.2779 0.001411 **
## Residuals  6547.3 20                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   238.4  1  0.3711 0.5493
## Batch        540.1  2  0.4204 0.6625
## Residuals  12846.8 20

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes   677.1  1  0.9820 0.3335
## Batch        132.3  2  0.0959 0.9090
## Residuals  13790.9 20               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes     0.0  1  0.0000 0.9965
## Batch       1743.8  2  1.3694 0.2771
## Residuals  12733.5 20               
## ILC -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 13806 14596 
## Number of genes to keep by expr: 7465

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value   Pr(>F)   
## n_barcodes 5565.9  1 16.7964 0.001477 **
## Batch       182.1  2  0.2747 0.764418   
## Residuals  3976.5 12                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value Pr(>F)
## n_barcodes  136.2  1  0.2038 0.6597
## Batch       278.9  2  0.2087 0.8145
## Residuals  8019.3 12

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value  Pr(>F)  
## n_barcodes 1254.3  1  3.4266 0.08891 .
## Batch      3830.2  2  5.2317 0.02324 *
## Residuals  4392.7 12                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value Pr(>F)
## n_barcodes  213.9  1  0.3159 0.5844
## Batch      1046.5  2  0.7729 0.4833
## Residuals  8124.1 12

## Number of genes to keep by var: 5619 
## Mac-or-Mono -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 18956  9446 
## Number of genes to keep by expr: 9403

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes  408.5  1  3.2719   0.07839 .  
## Batch      6645.0  2 26.6106 5.944e-08 ***
## Residuals  4744.5 38                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value  Pr(>F)  
## n_barcodes  2154.7  1  4.3638 0.04346 *
## Batch       4026.8  2  4.0777 0.02487 *
## Residuals  18763.1 38                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes   358.8  1  0.5915 0.4466
## Batch        505.0  2  0.4162 0.6625
## Residuals  23051.4 38               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes     8.4  1  0.0182 0.8934
## Batch         16.1  2  0.0175 0.9826
## Residuals  17484.4 38

## Number of genes to keep by var: 6086 
## MAIT -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 20101  8301 
## Number of genes to keep by expr: 11448

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes  16578  1 16.2854 0.0002321 ***
## Batch        7967  2  3.9133 0.0278365 *  
## Residuals   41736 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   295.2  1  0.3934 0.5340
## Batch        595.4  2  0.3967 0.6751
## Residuals  30763.7 41

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value Pr(>F)  
## n_barcodes   4234  1  3.4360  0.071 .
## Batch        3200  2  1.2983  0.284  
## Residuals   50520 41                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value Pr(>F)
## n_barcodes    729  1  0.8943 0.3498
## Batch          34  2  0.0209 0.9794
## Residuals   33403 41

## Number of genes to keep by var: 6721 
## Mono -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 24334  4068 
## Number of genes to keep by expr: 15992

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes   1681  1  10.941  0.001964 ** 
## Batch       52854  2 171.994 < 2.2e-16 ***
## Residuals    6300 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value  Pr(>F)  
## n_barcodes   9064  1  7.2687 0.01013 *
## Batch        9582  2  3.8421 0.02955 *
## Residuals   51127 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value Pr(>F)
## n_barcodes   1163  1  0.6990 0.4080
## Batch           6  2  0.0019 0.9981
## Residuals   68220 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value Pr(>F)
## n_barcodes   2694  1  2.3374 0.1340
## Batch         213  2  0.0923 0.9121
## Residuals   47248 41

## Number of genes to keep by var: 12468 
## Mono_Classical -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 23665  4737 
## Number of genes to keep by expr: 15069

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes   9040  1  9.3335 0.0039451 ** 
## Batch       17593  2  9.0814 0.0005433 ***
## Residuals   39713 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes   4789  1  7.4918  0.009123 ** 
## Batch       39902  2 31.2112 5.787e-09 ***
## Residuals   26208 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value Pr(>F)
## n_barcodes   3562  1  1.9313 0.1721
## Batch          42  2  0.0114 0.9886
## Residuals   75622 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value Pr(>F)
## n_barcodes    887  1  0.7049  0.406
## Batch          41  2  0.0162  0.984
## Residuals   51581 41

## Number of genes to keep by var: 11499 
## Mono_Intermediate -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 21149  7253 
## Number of genes to keep by expr: 12147

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value   Pr(>F)    
## n_barcodes  9737.4  1 12.8837 0.000895 ***
## Batch       1854.4  2  1.2268 0.304037    
## Residuals  30231.8 40                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  1229.4  1  15.585  0.000311 ***
## Batch      26708.1  2 169.291 < 2.2e-16 ***
## Residuals   3155.3 40                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value   Pr(>F)   
## n_barcodes   6745  1  7.4542 0.009366 **
## Batch          44  2  0.0246 0.975760   
## Residuals   36194 40                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   652.5  1  1.0892 0.3029
## Batch         20.6  2  0.0172 0.9829
## Residuals  23964.2 40

## Number of genes to keep by var: 8147 
## Mono_NonClassical -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 21087  7315 
## Number of genes to keep by expr: 12611

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  1689.4  1  16.567 0.0002088 ***
## Batch      21141.7  2 103.658 < 2.2e-16 ***
## Residuals   4181.1 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  4962.5  1  11.627 0.0014712 ** 
## Batch       7050.2  2   8.259 0.0009685 ***
## Residuals  17499.6 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes   319.2  1  0.4677 0.4979
## Batch        552.5  2  0.4048 0.6698
## Residuals  27982.0 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value  Pr(>F)  
## n_barcodes  2496.1  1  4.4909 0.04018 *
## Batch        670.5  2  0.6032 0.55186  
## Residuals  22788.6 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Number of genes to keep by var: 8204 
## Mono-T-dblt -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 19141  9261 
## Number of genes to keep by expr: 10254

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes 12986.3  1 33.7193 8.144e-07 ***
## Batch         56.5  2  0.0734    0.9294    
## Residuals  15790.3 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value  Pr(>F)  
## n_barcodes   368.4  1  1.3052 0.25990  
## Batch       2333.9  2  4.1344 0.02314 *
## Residuals  11572.4 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes     6.3  1  0.0108 0.9177
## Batch        157.9  2  0.1344 0.8746
## Residuals  24076.8 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value  Pr(>F)  
## n_barcodes  1129.7  1  3.1486 0.08342 .
## Batch          6.2  2  0.0087 0.99135  
## Residuals  14710.4 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Number of genes to keep by var: 6041 
## Neut -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 15883 12519 
## Number of genes to keep by expr: 6335

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes 3262.9  1 16.8497 0.0002606 ***
## Batch      1398.9  2  3.6118 0.0385146 *  
## Residuals  6196.8 32                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value   Pr(>F)   
## n_barcodes 2512.5  1 10.1421 0.003223 **
## Batch      1944.3  2  3.9241 0.029912 * 
## Residuals  7927.4 32                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes   822.8  1  2.5130 0.1227
## Batch        225.4  2  0.3442 0.7114
## Residuals  10477.9 32               
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value Pr(>F)
## n_barcodes  697.7  1  2.3484 0.1352
## Batch        67.6  2  0.1138 0.8928
## Residuals  9507.0 32

## Number of genes to keep by var: 3884 
## NK -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 22909  5493 
## Number of genes to keep by expr: 14234

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df  F value Pr(>F)    
## n_barcodes     29  1   0.2066 0.6518    
## Batch       58497  2 204.9108 <2e-16 ***
## Residuals    5852 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  9422.1  1 14.9320 0.0003893 ***
## Batch        251.4  2  0.1992 0.8201974    
## Residuals  25870.9 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value  Pr(>F)  
## n_barcodes   3206  1  3.6900 0.06171 .
## Batch          13  2  0.0077 0.99236  
## Residuals   35618 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   676.4  1  0.8830 0.3529
## Batch        304.2  2  0.1986 0.8207
## Residuals  31405.0 41

## Number of genes to keep by var: 10902 
## NK_CD16hi -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 22656  5746 
## Number of genes to keep by expr: 13809

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value Pr(>F)    
## n_barcodes     56  1   0.403 0.5291    
## Batch       56044  2 199.950 <2e-16 ***
## Residuals    5746 41                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value   Pr(>F)   
## n_barcodes  5761.1  1  8.4537 0.005855 **
## Batch         68.2  2  0.0501 0.951223   
## Residuals  27940.9 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value  Pr(>F)  
## n_barcodes   2495  1  2.8743 0.09759 .
## Batch           4  2  0.0026 0.99744  
## Residuals   35592 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes    31.3  1  0.0417 0.8391
## Batch        477.5  2  0.3185 0.7290
## Residuals  30733.6 41

## Number of genes to keep by var: 10527 
## NK_CD56hiCD16lo -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 18299 10103 
## Number of genes to keep by expr: 9998

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value   Pr(>F)   
## n_barcodes  3351.2  1  9.7708 0.003254 **
## Batch       2316.7  2  3.3772 0.043883 * 
## Residuals  14062.3 41                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)    
## n_barcodes   164.1  1   2.711 0.1073    
## Batch      14784.7  2 122.145 <2e-16 ***
## Residuals   2481.4 41                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes     9.6  1  0.0213 0.8846
## Batch        259.2  2  0.2891 0.7504
## Residuals  18378.4 41               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes     0.3  1  0.0007 0.9793
## Batch         18.0  2  0.0224 0.9778
## Residuals  16427.7 41

## Number of genes to keep by var: 6488 
## NK_proliferating -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 16249 12153 
## Number of genes to keep by expr: 8594

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes 5088.7  1 17.0273 0.0002695 ***
## Batch      1506.2  2  2.5199 0.0973628 .  
## Residuals  8965.7 30                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value   Pr(>F)    
## n_barcodes  808.4  1  5.6104  0.02449 *  
## Batch      7071.8  2 24.5407 4.85e-07 ***
## Residuals  4322.5 30                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes   369.3  1  0.7951 0.3797
## Batch        301.0  2  0.3240 0.7257
## Residuals  13934.9 30               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   118.5  1  0.2846 0.5977
## Batch         18.6  2  0.0223 0.9780
## Residuals  12494.9 30

## Number of genes to keep by var: 4975 
## pDC -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 18978  9424 
## Number of genes to keep by expr: 10776

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes 4917.0  1  21.654 3.548e-05 ***
## Batch      4560.2  2  10.041 0.0002926 ***
## Residuals  9082.9 40                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes  4124.5  1  29.584 2.896e-06 ***
## Batch      12768.1  2  45.792 4.541e-11 ***
## Residuals   5576.6 40                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes   529.7  1  1.1665 0.2866
## Batch          2.3  2  0.0025 0.9975
## Residuals  18163.7 40               
## Anova Table (Type II tests)
## 
## Response: PC2
##             Sum Sq Df F value Pr(>F)
## n_barcodes   667.2  1  1.5508 0.2203
## Batch        435.4  2  0.5060 0.6067
## Residuals  17210.2 40

## Number of genes to keep by var: 6819 
## Plasmablast -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 15840 12562 
## Number of genes to keep by expr: 9451

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##            Sum Sq Df F value   Pr(>F)   
## n_barcodes 1291.3  1  2.6874 0.129404   
## Batch      7852.6  2  8.1714 0.006684 **
## Residuals  5285.4 11                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value Pr(>F)
## n_barcodes 1432.0  1  2.3918 0.1502
## Batch      2478.7  2  2.0700 0.1726
## Residuals  6585.9 11

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value Pr(>F)
## n_barcodes    23.3  1  0.0228 0.8828
## Batch       1706.9  2  0.8321 0.4608
## Residuals  11282.7 11               
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value Pr(>F)
## n_barcodes   11.8  1  0.0166 0.8998
## Batch      3490.4  2  2.4626 0.1307
## Residuals  7795.5 11               
## Platelet -
## Number of genes with no counts across samples:
## 
## FALSE  TRUE 
## 15772 12630 
## Number of genes to keep by expr: 5702

## Before correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value    Pr(>F)    
## n_barcodes 15148.3  1  49.876 1.346e-08 ***
## Batch       8068.6  2  13.283 3.569e-05 ***
## Residuals  12452.4 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value    Pr(>F)    
## n_barcodes 3893.0  1  47.666 2.251e-08 ***
## Batch      6061.8  2  37.110 6.320e-10 ***
## Residuals  3348.6 41                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## After correction:
## Anova Table (Type II tests)
## 
## Response: PC1
##             Sum Sq Df F value  Pr(>F)  
## n_barcodes  2417.5  1  4.7877 0.03442 *
## Batch        144.5  2  0.1431 0.86709  
## Residuals  20702.2 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Anova Table (Type II tests)
## 
## Response: PC2
##            Sum Sq Df F value  Pr(>F)  
## n_barcodes  947.5  1  4.8891 0.03266 *
## Batch       482.1  2  1.2437 0.29897  
## Residuals  7945.8 41                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Number of genes to keep by var: 3352
saveRDS(pseudobulk_list_normalized,file.path(output.folder,paste0("covid_flu.CITEseq.pseudobulk.gene.expr.normalized.RDS")))
saveRDS(pseudobulk_list_filtered,file.path(output.folder,paste0("covid_flu.CITEseq.pseudobulk.gene.expr.filtered.RDS")))

Session Info

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server 7.9 (Maipo)
## 
## Matrix products: default
## BLAS/LAPACK: /sysapps/cluster/software/Anaconda3/2020.07/envs/R-4.1.0/lib/libopenblasp-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] doParallel_1.0.16           iterators_1.0.13           
##  [3] foreach_1.5.1               BiocParallel_1.26.2        
##  [5] ggplot2_3.3.5               dplyr_1.0.8                
##  [7] edgeR_3.34.1                limma_3.48.3               
##  [9] SummarizedExperiment_1.22.0 Biobase_2.52.0             
## [11] GenomicRanges_1.44.0        GenomeInfoDb_1.28.1        
## [13] IRanges_2.26.0              S4Vectors_0.30.0           
## [15] BiocGenerics_0.38.0         MatrixGenerics_1.6.0       
## [17] matrixStats_0.61.0          knitr_1.33                 
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.0             jsonlite_1.7.2         carData_3.0-4         
##  [4] bslib_0.2.5.1          assertthat_0.2.1       highr_0.9             
##  [7] cellranger_1.1.0       GenomeInfoDbData_1.2.6 yaml_2.2.1            
## [10] pillar_1.6.1           lattice_0.20-44        glue_1.6.2            
## [13] digest_0.6.27          XVector_0.32.0         colorspace_2.0-2      
## [16] htmltools_0.5.1.1      Matrix_1.3-4           pkgconfig_2.0.3       
## [19] haven_2.4.1            zlibbioc_1.38.0        purrr_0.3.4           
## [22] scales_1.1.1           openxlsx_4.2.4         rio_0.5.27            
## [25] tibble_3.1.2           generics_0.1.0         farver_2.1.0          
## [28] car_3.0-11             ellipsis_0.3.2         withr_2.4.2           
## [31] cli_3.0.0              readxl_1.3.1           magrittr_2.0.1        
## [34] crayon_1.4.1           evaluate_0.14          fansi_0.4.2           
## [37] forcats_0.5.1          foreign_0.8-81         tools_4.1.0           
## [40] data.table_1.14.0      hms_1.1.0              lifecycle_1.0.1       
## [43] stringr_1.4.0          munsell_0.5.0          locfit_1.5-9.4        
## [46] zip_2.2.0              DelayedArray_0.18.0    compiler_4.1.0        
## [49] jquerylib_0.1.4        rlang_1.0.2            grid_4.1.0            
## [52] RCurl_1.98-1.3         bitops_1.0-7           labeling_0.4.2        
## [55] rmarkdown_2.9          gtable_0.3.0           codetools_0.2-18      
## [58] abind_1.4-5            DBI_1.1.1              curl_4.3.2            
## [61] R6_2.5.0               utf8_1.2.1             stringi_1.7.6         
## [64] Rcpp_1.0.7             vctrs_0.3.8            tidyselect_1.1.2      
## [67] xfun_0.24